{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "4546394f-26fe-4a87-a013-dcc3e7916a19",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "changmuxingzi 123\n"
     ]
    }
   ],
   "source": [
    "print(\"changmuxingzi 123\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "eef6e6b1-7847-479f-b9eb-dbc2b8683e18",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'float'>\n"
     ]
    }
   ],
   "source": [
    "name = \"luoyaosheng\"\n",
    "number = 123\n",
    "round_num = 123.0\n",
    "print(type(round_num))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "e1ed66ca-b27b-471d-8113-69fe29411e75",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'luoyaosheng'"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "e9db390b-2f32-41c1-9414-3e4f8be38b21",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "gesaol\n"
     ]
    }
   ],
   "source": [
    "print(name[::-2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "93d82290-c4de-49eb-9ff2-eac991d08558",
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "876bcb805631f80",
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "data_with_sentiment = pd.read_csv('./data/taged_data_by_llm.csv')\n",
    "data_original = pd.read_csv(\"./data/stock_posts_hk01810_cleaned.csv\", low_memory=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "72b2b1ea-0dc2-4297-9461-e39d9c6395aa",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\QD\\AppData\\Local\\Temp\\ipykernel_30896\\2961821885.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
      "  new_data['sentiment'] =   data_with_sentiment[data_original['post_id'].isin(data_with_sentiment['post_id'])]['sentiment']\n",
      "C:\\Users\\QD\\AppData\\Local\\Temp\\ipykernel_30896\\2961821885.py:2: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  new_data['sentiment'] =   data_with_sentiment[data_original['post_id'].isin(data_with_sentiment['post_id'])]['sentiment']\n"
     ]
    }
   ],
   "source": [
    "new_data = data_original[data_original['post_id'].isin(data_with_sentiment['post_id'])]\n",
    "new_data['sentiment'] =   data_with_sentiment[data_original['post_id'].isin(data_with_sentiment['post_id'])]['sentiment']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "dfbb497b-ad80-4a38-8154-4bac561172a4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>post_id</th>\n",
       "      <th>post_title</th>\n",
       "      <th>publish_date</th>\n",
       "      <th>post_url</th>\n",
       "      <th>stockbar_code</th>\n",
       "      <th>stockbar_name</th>\n",
       "      <th>user_id</th>\n",
       "      <th>user_nickname</th>\n",
       "      <th>user_is_majia</th>\n",
       "      <th>post_click_count</th>\n",
       "      <th>post_comment_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1538052065</td>\n",
       "      <td>这垃圾终究会回到原点</td>\n",
       "      <td>2025-04-10 00:23:00</td>\n",
       "      <td>https://guba.eastmoney.com/news,hk01810,153805...</td>\n",
       "      <td>hk01810</td>\n",
       "      <td>小米集团-W吧</td>\n",
       "      <td>2.043094e+15</td>\n",
       "      <td>楼观天下81825</td>\n",
       "      <td>False</td>\n",
       "      <td>67</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1538050762</td>\n",
       "      <td>问deepseek, 把新能源江湖代入三国,朱江明竟然是....?</td>\n",
       "      <td>2025-04-10 00:07:54</td>\n",
       "      <td>https://guba.eastmoney.com/news,hk09863,153805...</td>\n",
       "      <td>hk09863</td>\n",
       "      <td>零跑汽车吧</td>\n",
       "      <td>7.913077e+15</td>\n",
       "      <td>追风瓜牛</td>\n",
       "      <td>False</td>\n",
       "      <td>63</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1538050424</td>\n",
       "      <td>见证历史!南向资金单日净买入额创历史新高</td>\n",
       "      <td>2025-04-10 00:04:10</td>\n",
       "      <td>https://guba.eastmoney.com/news,hk00317,153805...</td>\n",
       "      <td>hk00317</td>\n",
       "      <td>中船防务吧</td>\n",
       "      <td>8.768014e+15</td>\n",
       "      <td>中船防务资讯</td>\n",
       "      <td>True</td>\n",
       "      <td>9796</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1538048476</td>\n",
       "      <td>真的麻了,别的都在纷纷买入维稳股价,唯独这个只有吸血硬靠带起来,啥都不是</td>\n",
       "      <td>2025-04-09 23:47:17</td>\n",
       "      <td>https://guba.eastmoney.com/news,hk01810,153804...</td>\n",
       "      <td>hk01810</td>\n",
       "      <td>小米集团-W吧</td>\n",
       "      <td>3.427386e+15</td>\n",
       "      <td>刺客哑哑</td>\n",
       "      <td>False</td>\n",
       "      <td>88</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1538047989</td>\n",
       "      <td>大东已经足够强大!团结一心打响保卫战</td>\n",
       "      <td>2025-04-09 23:43:05</td>\n",
       "      <td>https://guba.eastmoney.com/news,hk01810,153804...</td>\n",
       "      <td>hk01810</td>\n",
       "      <td>小米集团-W吧</td>\n",
       "      <td>7.308514e+15</td>\n",
       "      <td>港股通ATM提款机</td>\n",
       "      <td>False</td>\n",
       "      <td>69</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>294</th>\n",
       "      <td>1537670008</td>\n",
       "      <td>片子营销</td>\n",
       "      <td>2025-04-09 10:34:47</td>\n",
       "      <td>https://guba.eastmoney.com/news,hk01810,153767...</td>\n",
       "      <td>hk01810</td>\n",
       "      <td>小米集团-W吧</td>\n",
       "      <td>3.746346e+15</td>\n",
       "      <td>股友tJf9UD1385</td>\n",
       "      <td>False</td>\n",
       "      <td>77</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>295</th>\n",
       "      <td>1537667699</td>\n",
       "      <td>响应国家号召,干!</td>\n",
       "      <td>2025-04-09 10:33:08</td>\n",
       "      <td>https://guba.eastmoney.com/news,hk01810,153766...</td>\n",
       "      <td>hk01810</td>\n",
       "      <td>小米集团-W吧</td>\n",
       "      <td>2.221094e+15</td>\n",
       "      <td>MAX8000</td>\n",
       "      <td>False</td>\n",
       "      <td>71</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>296</th>\n",
       "      <td>1537664313</td>\n",
       "      <td>必须赢</td>\n",
       "      <td>2025-04-09 10:30:50</td>\n",
       "      <td>https://guba.eastmoney.com/news,hk01810,153766...</td>\n",
       "      <td>hk01810</td>\n",
       "      <td>小米集团-W吧</td>\n",
       "      <td>1.323326e+15</td>\n",
       "      <td>有创造力的尹晔3</td>\n",
       "      <td>False</td>\n",
       "      <td>20</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297</th>\n",
       "      <td>1537663604</td>\n",
       "      <td>恒生指数科技涨逾1% 小米集团涨逾6%</td>\n",
       "      <td>2025-04-09 10:30:21</td>\n",
       "      <td>https://guba.eastmoney.com/news,hk01810,153766...</td>\n",
       "      <td>hk01810</td>\n",
       "      <td>小米集团-W吧</td>\n",
       "      <td>6.464095e+15</td>\n",
       "      <td>小米集团W资讯</td>\n",
       "      <td>True</td>\n",
       "      <td>563</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>298</th>\n",
       "      <td>1537663622</td>\n",
       "      <td>部分港股科网股反弹 小米集团涨近5%</td>\n",
       "      <td>2025-04-09 10:30:21</td>\n",
       "      <td>https://guba.eastmoney.com/news,hk01810,153766...</td>\n",
       "      <td>hk01810</td>\n",
       "      <td>小米集团-W吧</td>\n",
       "      <td>6.464095e+15</td>\n",
       "      <td>小米集团W资讯</td>\n",
       "      <td>True</td>\n",
       "      <td>1182</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>297 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        post_id                            post_title         publish_date  \\\n",
       "0    1538052065                            这垃圾终究会回到原点  2025-04-10 00:23:00   \n",
       "1    1538050762     问deepseek, 把新能源江湖代入三国,朱江明竟然是....?  2025-04-10 00:07:54   \n",
       "2    1538050424                  见证历史!南向资金单日净买入额创历史新高  2025-04-10 00:04:10   \n",
       "3    1538048476  真的麻了,别的都在纷纷买入维稳股价,唯独这个只有吸血硬靠带起来,啥都不是  2025-04-09 23:47:17   \n",
       "4    1538047989                    大东已经足够强大!团结一心打响保卫战  2025-04-09 23:43:05   \n",
       "..          ...                                   ...                  ...   \n",
       "294  1537670008                                  片子营销  2025-04-09 10:34:47   \n",
       "295  1537667699                             响应国家号召,干!  2025-04-09 10:33:08   \n",
       "296  1537664313                                   必须赢  2025-04-09 10:30:50   \n",
       "297  1537663604                   恒生指数科技涨逾1% 小米集团涨逾6%  2025-04-09 10:30:21   \n",
       "298  1537663622                    部分港股科网股反弹 小米集团涨近5%  2025-04-09 10:30:21   \n",
       "\n",
       "                                              post_url stockbar_code  \\\n",
       "0    https://guba.eastmoney.com/news,hk01810,153805...       hk01810   \n",
       "1    https://guba.eastmoney.com/news,hk09863,153805...       hk09863   \n",
       "2    https://guba.eastmoney.com/news,hk00317,153805...       hk00317   \n",
       "3    https://guba.eastmoney.com/news,hk01810,153804...       hk01810   \n",
       "4    https://guba.eastmoney.com/news,hk01810,153804...       hk01810   \n",
       "..                                                 ...           ...   \n",
       "294  https://guba.eastmoney.com/news,hk01810,153767...       hk01810   \n",
       "295  https://guba.eastmoney.com/news,hk01810,153766...       hk01810   \n",
       "296  https://guba.eastmoney.com/news,hk01810,153766...       hk01810   \n",
       "297  https://guba.eastmoney.com/news,hk01810,153766...       hk01810   \n",
       "298  https://guba.eastmoney.com/news,hk01810,153766...       hk01810   \n",
       "\n",
       "    stockbar_name       user_id user_nickname user_is_majia  post_click_count  \\\n",
       "0         小米集团-W吧  2.043094e+15     楼观天下81825         False                67   \n",
       "1           零跑汽车吧  7.913077e+15          追风瓜牛         False                63   \n",
       "2           中船防务吧  8.768014e+15        中船防务资讯          True              9796   \n",
       "3         小米集团-W吧  3.427386e+15          刺客哑哑         False                88   \n",
       "4         小米集团-W吧  7.308514e+15     港股通ATM提款机         False                69   \n",
       "..            ...           ...           ...           ...               ...   \n",
       "294       小米集团-W吧  3.746346e+15  股友tJf9UD1385         False                77   \n",
       "295       小米集团-W吧  2.221094e+15       MAX8000         False                71   \n",
       "296       小米集团-W吧  1.323326e+15      有创造力的尹晔3         False                20   \n",
       "297       小米集团-W吧  6.464095e+15       小米集团W资讯          True               563   \n",
       "298       小米集团-W吧  6.464095e+15       小米集团W资讯          True              1182   \n",
       "\n",
       "     post_comment_count  \n",
       "0                     2  \n",
       "1                     0  \n",
       "2                     1  \n",
       "3                     3  \n",
       "4                     0  \n",
       "..                  ...  \n",
       "294                   1  \n",
       "295                   1  \n",
       "296                   0  \n",
       "297                   2  \n",
       "298                  22  \n",
       "\n",
       "[297 rows x 11 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "012b8f0c-171f-4ab9-adb8-148c094d5091",
   "metadata": {},
   "outputs": [],
   "source": [
    "lan_dict = {\n",
    "  '跑': \"run\",\n",
    "    \"跳\": \"jump\",\n",
    "    \"首页\": \"Home\"\n",
    "}\n",
    "\n",
    "# key value\n",
    "\n",
    "def get_language(chinese_name):\n",
    "    print(lan_dict[chinese_name]) \n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "2433862e-d0f9-49a6-9ad5-837af3a5c3d3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Home\n"
     ]
    }
   ],
   "source": [
    "get_language(\"首页\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "c90e61db-70c1-46c5-a873-feb6693ed0cc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "1\n",
      "2\n",
      "3\n",
      "4\n"
     ]
    }
   ],
   "source": [
    "for i in range(0, 5):\n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e20e9d28-f879-4cc3-90b2-741695b36978",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "1971d927-65e1-472d-8f44-573c18b744ea",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "1\n",
      "2\n",
      "3\n",
      "4\n"
     ]
    }
   ],
   "source": [
    "for i in [0, 1, 2, 3, 4]:\n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "17d5eaf7-db76-4abb-8551-43ed66d9c007",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 1, 2, 3]\n"
     ]
    }
   ],
   "source": [
    "print([0,1,2,3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "9cc28916-5c9d-4dde-9e2c-b00a7028e3e3",
   "metadata": {},
   "outputs": [],
   "source": [
    "square = [0, 1, 2, 3, 4]\n",
    "for i in range(0, 5):\n",
    "    square[i] = 'white' "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "a8e9a597-0d72-4a2a-97c9-dde72d5a8233",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "file = ['1.pdf', '2.pdf']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "64702f6a-a362-4a5a-9b22-bbf1b871fff2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 1.pdf\n",
      "1 2.pdf\n"
     ]
    }
   ],
   "source": [
    "for index, file_name in enumerate(file):\n",
    "    print(index, file_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "ed3149fb-082e-4365-b122-9a5a05872417",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.pdf\n",
      "2.pdf\n"
     ]
    }
   ],
   "source": [
    "for i in file:\n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "e701029b-3225-423d-934c-79f308e98608",
   "metadata": {},
   "outputs": [],
   "source": [
    "def changmusay(a):\n",
    "    print(f\"changmuxingzi loves {a}\")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "9190ea52-f972-423c-8434-ba94447f1773",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "changmuxingzi loves u\n"
     ]
    }
   ],
   "source": [
    "changmusay(\"u\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "id": "2f55d4b4-ac0d-4550-9601-0fadae3b307d",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "def add1(a):\n",
    "    b = a + 1\n",
    "    # print('123')\n",
    "    # print(a, \"plus 1 equals\", b)\n",
    "    return b\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "id": "9c4dc533-522a-4d8f-9fc8-878ae2c6267a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "123\n",
      "2 plus 1 equals 3\n",
      "3\n"
     ]
    }
   ],
   "source": [
    "print(add1(2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "607d60ac-14ca-462f-826d-2f2aea291e96",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3\n"
     ]
    }
   ],
   "source": [
    "print(add1(2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "bc01f2a6-08c0-4188-92ce-a31b5dd58869",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3\n"
     ]
    }
   ],
   "source": [
    "print( add1(2)  )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "id": "5f435921-e4d3-4e71-8923-c3350ae63016",
   "metadata": {},
   "outputs": [],
   "source": [
    "add1(2)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "2487cb0f-f951-4dcb-a990-eb8799f5fa3f",
   "metadata": {},
   "outputs": [],
   "source": [
    "a = 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "id": "1d031665-76db-4be4-8376-4f07d843716f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "add1(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1eceef00-739c-44bd-8f9f-8d6a351ad12f",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.21"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
